Report with Evidently

%load_ext kedro.ipython
[09/05/24 21:27:53] INFO     Using                                                                  __init__.py:249
                             'c:\Users\Admin\miniconda3\envs\recsys\Lib\site-packages\kedro\framewo                
                             rk\project\rich_logging.yml' as logging configuration.                                
[09/05/24 21:27:54] INFO     Registered line magic '%reload_kedro'                                   __init__.py:58
                    INFO     Registered line magic '%load_node'                                      __init__.py:60
                    INFO     Resolved project path as:                                              __init__.py:175
                             c:\Users\Admin\Desktop\Semester_7\MLOps\AI-839\srinivasan-ai-839.                     
                             To set a different path, run '%reload_kedro <project_root>'                           
[09/05/24 21:27:55] WARNING  c:\Users\Admin\miniconda3\envs\recsys\Lib\site-packages\kedro_viz\__in warnings.py:112
                             it__.py:13: KedroVizPythonVersionWarning: Please be advised that Kedro                
                             Viz is not yet fully                                                                  
                                     compatible with the Python version you are currently using.                   
                               warnings.warn(                                                                      
                                                                                                                   
                    INFO     Kedro is sending anonymous usage data with the sole purpose of improving plugin.py:233
                             the product. No personal data or IP addresses are stored on our side. If              
                             you want to opt out, set the `KEDRO_DISABLE_TELEMETRY` or `DO_NOT_TRACK`              
                             environment variables, or create a `.telemetry` file in the current                   
                             working directory with the contents `consent: false`. Read more at                    
                             https://docs.kedro.org/en/stable/configuration/telemetry.html                         
[09/05/24 21:27:56] INFO     Kedro project Srinivasan-ai-839                                        __init__.py:141
                    INFO     Defined global variable 'context', 'session', 'catalog' and            __init__.py:142
                             'pipelines'                                                                           
[09/05/24 21:27:57] INFO     Registered line magic 'run_viz'                                        __init__.py:148
dataset = catalog.load("dataset_id_214")
[09/05/24 21:49:58] INFO     Loading data from dataset_id_214 (CSVDataset)...                   data_catalog.py:539
import pandas as pd
import numpy as np

from sklearn.datasets import fetch_california_housing

from evidently import ColumnMapping

from evidently.report import Report
from evidently.metrics.base_metric import generate_column_metrics
from evidently.metric_preset import (
    DataDriftPreset,
    TargetDriftPreset,
    DataQualityPreset,
    RegressionPreset,
)
from evidently.metrics import *

from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_column_tests
from evidently.test_preset import (
    DataStabilityTestPreset,
    NoTargetPerformanceTestPreset,
    RegressionTestPreset,
)
from evidently.tests import *
import warnings

warnings.filterwarnings("ignore")
warnings.simplefilter("ignore")
# noise_probability = 0.1  # Set the probability of flipping (10% noise)

# # Generate random values between 0 and 1, and flip based on the noise_probability
# flip_mask = np.random.rand(dataset.shape[0]) < noise_probability

# # Flip the boolean values wherever the flip_mask is True
# dataset['y'] = np.where(flip_mask, ~dataset['y'], dataset['y'])
# # dataset['y'] = dataset['y'] + np.random.normal(0,1,dataset.shape[0])
reference = dataset.sample(n=50, replace=False)
current = dataset.sample(n=50, replace=True)
# dataset.head()
reference.head()

checking_status duration credit_history purpose credit_amount savings_status employment installment_commitment personal_status other_parties ... X_5 X_6 X_7 X_8 X_9 X_10 X_11 X_12 X_13 y
2 no checking 24 critical/other existing credit radio/tv 5103.0 <100 <1 3 male mar/wid none ... 0.135713 0.185599 0.992303 0.778875 0.236489 0.317112 0.751187 0.080140 0.272325 True
53 >=200 18 existing paid furniture/equipment 3049.0 <100 <1 1 female div/dep/mar none ... 0.847548 0.809026 0.235335 0.986156 0.187755 0.652575 0.937840 0.649408 0.557902 True
92 >=200 36 existing paid radio/tv 4473.0 <100 >=7 4 male single none ... 0.314592 0.653284 0.703138 0.151543 0.624606 0.771431 0.813860 0.958305 0.681945 True
81 no checking 36 critical/other existing credit furniture/equipment 7127.0 <100 <1 2 female div/dep/mar none ... 0.755093 0.773156 0.326619 0.680981 0.180544 0.342531 0.790025 0.660239 0.328569 False
27 0<=X<200 15 existing paid repairs 1308.0 <100 >=7 4 male single none ... 0.321287 0.562743 0.384324 0.404911 0.697955 0.207398 0.259863 0.708330 0.015333 True

5 rows × 35 columns

report = Report(metrics=[DataDriftPreset()])
report.run(reference_data=reference, current_data=current)
report